load("./estimation_stuff/hs_characteristics")

load("./output/results_df_baseline")
results_baseline<-results_df
results_baseline$scenario<-"baseline"

results_baseline_long_sem1<-subset(results_baseline, select=c("pid", hs_characteristics, "sex", "race", "hsgpa_level", "mu_s", "mu_y","sem1_s_model", "sem1_s_not_i_model", "sem1_n_friends", "sem1_y_model", "sem1_y_growth", "scenario"))
results_baseline_long_sem1$sem<-1
names(results_baseline_long_sem1)<-sub("sem1_", "", names(results_baseline_long_sem1))

results_baseline_long_sem2<-subset(results_baseline, select=c("pid", hs_characteristics, "sex", "race", "hsgpa_level", "mu_s", "mu_y","sem2_s_model", "sem2_s_not_i_model", "sem2_n_friends", "sem2_y_model", "sem2_y_growth", "scenario") )
results_baseline_long_sem2$sem<-2
names(results_baseline_long_sem2)<-sub("sem2_", "", names(results_baseline_long_sem2))

### stack results across the semesters
results_baseline_both_sem<-rbind(results_baseline_long_sem1, results_baseline_long_sem2)

sd_y_baseline<-sd(results_baseline_both_sem$y_model, na.rm=T)
sd_y_growth_baseline<-sd(results_baseline_both_sem$y_model, na.rm=T)
sd_s_baseline<-sd(results_baseline_both_sem$s_model, na.rm=T)
sd_s_not_i_baseline<-sd(results_baseline_both_sem$s_not_i_model, na.rm=T)

load("./output/results_df_cf")
results_cf<-results_df
results_cf$scenario<-"cf"

results_cf_long_sem1<-subset(results_cf, select=c("pid", hs_characteristics, "sex", "race", "hsgpa_level", "mu_s", "mu_y","sem1_s_model", "sem1_s_not_i_model", "sem1_n_friends",  "sem1_y_model", "sem1_y_growth", "scenario"))
results_cf_long_sem1$sem<-1
names(results_cf_long_sem1)<-sub("sem1_", "", names(results_cf_long_sem1))

results_cf_long_sem2<-subset(results_cf, select=c("pid", hs_characteristics, "sex", "race", "hsgpa_level", "mu_s", "mu_y","sem2_s_model", "sem2_s_not_i_model", "sem2_n_friends", "sem2_y_model", "sem2_y_growth", "scenario") )
results_cf_long_sem2$sem<-2
names(results_cf_long_sem2)<-sub("sem2_", "", names(results_cf_long_sem2))

### stack results across the semesters
  results_cf_both_sem<-rbind(results_cf_long_sem1, results_cf_long_sem2)
  
  results_diff_both_sem<-subset(results_baseline_both_sem, select=c("pid", hs_characteristics, "sex", "race", "hsgpa_level", "mu_s", "mu_y", "sem"))
  results_diff_both_sem$y_diff<- results_cf_both_sem$y_model - results_baseline_both_sem$y_model
  results_diff_both_sem$y_growth_diff<- results_cf_both_sem$y_growth - results_baseline_both_sem$y_growth
  results_diff_both_sem$s_diff<- results_cf_both_sem$s_model - results_baseline_both_sem$s_model
  results_diff_both_sem$s_not_i_diff<- results_cf_both_sem$s_not_i_model - results_baseline_both_sem$s_not_i_model

### standardize results by expressing in baseline standard deviations
  results_diff_both_sem$y_diff_stand<-results_diff_both_sem$y_diff / sd(y_sim_array, na.rm=T)
  results_diff_both_sem$y_growth_diff_stand<-results_diff_both_sem$y_growth_diff / sqrt(sd(y_sim_array, na.rm=T)^2 - sd(solution$mu_y_model, na.rm=T)^2)
  results_diff_both_sem$s_diff_stand<-results_diff_both_sem$s_diff / sd(s_sim_array, na.rm=T)
  results_diff_both_sem$s_not_i_diff_stand<-results_diff_both_sem$s_not_i_diff / sd(s_not_i_sim_array, na.rm=T)

  tikz("./output/plot_y_diff.tex", standAlone = F)
    ggplot(data=results_diff_both_sem, aes(x=y_diff, color=sex, linetype=race)) + stat_ecdf() + theme(legend.position="bottom")+ labs(y="", x="change in achievement")+ my_theme
  dev.off()
  
tikz("./output/plot_s_diff.tex", standAlone = F)
  ggplot(data=results_diff_both_sem, aes(x=s_diff, color=sex, linetype=race)) + stat_ecdf() + theme_bw() + theme(legend.position="bottom") + labs(y="", x="change in own study time")+ theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left")
dev.off()

tikz("./output/plot_s_not_i_diff.tex", standAlone = F)
  ggplot(data=results_diff_both_sem, aes(x=s_not_i_diff, color=sex, linetype=race)) + stat_ecdf() + theme_bw() + theme(legend.position="bottom")+ labs(y="", x="change in friend study time")+ theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left")
dev.off()

  y_diff_table<-with(results_diff_both_sem, t(cbind(total=mean(y_diff, na.rm=T),  "pct. chng sd"=(sd(results_cf_both_sem$y_model, na.rm=T)/sd(results_baseline_both_sem$y_model, na.rm=T)-1),t(tapply(y_diff, list(race), mean, na.rm=T)), t(tapply(y_diff, list(sex), mean, na.rm=T)), t(tapply(y_diff, list(hsgpa_level), mean, na.rm=T)))))

  y_growth_diff_table<-with(results_diff_both_sem, t(cbind(total=mean(y_growth_diff, na.rm=T),  "pct. chng sd"=(sd(results_cf_both_sem$y_growth, na.rm=T)/sd(results_baseline_both_sem$y_growth, na.rm=T)-1), t(tapply(y_growth_diff, list(race), mean, na.rm=T)), t(tapply(y_growth_diff, list(sex), mean, na.rm=T)), t(tapply(y_growth_diff, list(hsgpa_level), mean, na.rm=T)))))

  s_diff_table<-with(results_diff_both_sem, t(cbind(total=mean(s_diff, na.rm=T),  "pct. chng sd"=(sd(results_cf_both_sem$s_model, na.rm=T)/sd(results_baseline_both_sem$s_model, na.rm=T)-1), t(tapply(s_diff, list(race), mean, na.rm=T)), t(tapply(s_diff, list(sex), mean, na.rm=T)), t(tapply(s_diff, list(hsgpa_level), mean, na.rm=T)))))

  s_not_i_diff_table<-with(results_diff_both_sem, t(cbind(total=mean(s_not_i_diff, na.rm=T),  "pct. chng sd"=(sd(results_cf_both_sem$s_not_i_model, na.rm=T)/sd(results_baseline_both_sem$s_not_i_model, na.rm=T)-1), t(tapply(s_not_i_diff, list(race), mean, na.rm=T)),  t(tapply(s_not_i_diff, list(sex), mean, na.rm=T)), t(tapply(s_not_i_diff, list(hsgpa_level), mean, na.rm=T)))))

## split by semester
  y_diff_by_sem_table<-with(results_diff_both_sem, t(cbind(total=tapply(y_diff, sem, mean, na.rm=T),  t(tapply(y_diff, list(race, sem), mean, na.rm=T)), t(tapply(y_diff, list(sex,sem), mean, na.rm=T)), t(tapply(y_diff, list(hsgpa_level,sem), mean, na.rm=T)))))

  s_diff_by_sem_table<-with(results_diff_both_sem, t(cbind(total=tapply(s_diff, sem, mean, na.rm=T),  t(tapply(s_diff, list(race, sem), mean, na.rm=T)), t(tapply(s_diff, list(sex,sem), mean, na.rm=T)), t(tapply(s_diff, list(hsgpa_level,sem), mean, na.rm=T)))))

  s_not_i_diff_by_sem_table<-with(results_diff_both_sem, t(cbind(total=tapply(s_not_i_diff, sem, mean, na.rm=T),  t(tapply(s_not_i_diff, list(race, sem), mean, na.rm=T)), t(tapply(s_not_i_diff, list(sex,sem), mean, na.rm=T)), t(tapply(s_not_i_diff, list(hsgpa_level,sem), mean, na.rm=T)))))

### standardized by dividing by model SD
  y_diff_stand_table<-with(results_diff_both_sem, t(cbind(total=mean(y_diff_stand, na.rm=T),  "pct. chng sd"=(sd(results_cf_both_sem$y_model, na.rm=T)/sd(results_baseline_both_sem$y_model, na.rm=T)-1),t(tapply(y_diff_stand, list(race), mean, na.rm=T)), t(tapply(y_diff_stand, list(sex), mean, na.rm=T)), t(tapply(y_diff_stand, list(hsgpa_level), mean, na.rm=T)))))
  
  y_growth_diff_stand_table<-with(results_diff_both_sem, t(cbind(total=mean(y_growth_diff_stand, na.rm=T),  "pct. chng sd"=(sd(results_cf_both_sem$y_growth, na.rm=T)/sd(results_baseline_both_sem$y_growth, na.rm=T)-1), t(tapply(y_growth_diff_stand, list(race), mean, na.rm=T)), t(tapply(y_growth_diff_stand, list(sex), mean, na.rm=T)), t(tapply(y_growth_diff_stand, list(hsgpa_level), mean, na.rm=T)))))

  s_diff_stand_table<-with(results_diff_both_sem, t(cbind(total=mean(s_diff_stand, na.rm=T),  "pct. chng sd"=(sd(results_cf_both_sem$s_model, na.rm=T)/sd(results_baseline_both_sem$s_model, na.rm=T)-1), t(tapply(s_diff_stand, list(race), mean, na.rm=T)), t(tapply(s_diff_stand, list(sex), mean, na.rm=T)), t(tapply(s_diff_stand, list(hsgpa_level), mean, na.rm=T)))))
  
  s_not_i_diff_stand_table<-with(results_diff_both_sem, t(cbind(total=mean(s_not_i_diff_stand, na.rm=T),  "pct. chng sd"=(sd(results_cf_both_sem$s_not_i_model, na.rm=T)/sd(results_baseline_both_sem$s_not_i_model, na.rm=T)-1), t(tapply(s_not_i_diff_stand, list(race), mean, na.rm=T)),  t(tapply(s_not_i_diff_stand, list(sex), mean, na.rm=T)), t(tapply(s_not_i_diff_stand, list(hsgpa_level), mean, na.rm=T)))))

  stargazer(y_diff_table, title="Change in achievement, in sd", out="./output/y_diff_table.tex")
  stargazer(y_growth_diff_table, title="Change in growth of achievement, in sd", out="./output/y_growth_diff_table.tex")
  stargazer(s_diff_table, title="Change in study time, in sd", out="./output/s_diff_table.tex")
  stargazer(s_not_i_diff_table, title="Change in friend study time, in sd", out="./output/s_not_i_diff_table.tex")

  ## TABLE 6
  all_diff_tables<-round(data.frame(y_diff=y_diff_table, y_growth_diff = y_growth_diff_table, s_diff=s_diff_table, s_not_i_diff=s_not_i_diff_table), 3)
  write.csv(all_diff_tables, file="./output/all_diff_tables.csv")
  stargazer(all_diff_tables, title="Change in outcomes", out="./output/all_diff_tables.tex")
  print(all_diff_tables)

  all_diff_by_sem_tables<-round(data.frame(y_diff=y_diff_by_sem_table, s_diff=s_diff_by_sem_table, s_not_i_diff=s_not_i_diff_by_sem_table), 3)
  write.csv(all_diff_by_sem_tables, file="./output/all_diff_by_sem_tables.csv")
  stargazer(all_diff_by_sem_tables, title="Change in outcomes", out="./output/all_diff_by_sem_tables.tex")

all_diff_stand_tables<-round(data.frame(y_diff_stand=y_diff_stand_table, y_growth_diff = y_growth_diff_stand_table, s_diff_stand=s_diff_stand_table, s_not_i_diff_stand=s_not_i_diff_stand_table), 3)
write.csv(all_diff_stand_tables, file="./output/all_diff_stand_tables.csv")
stargazer(all_diff_stand_tables, title="Change in outcomes, in sd", out="./output/all_diff_stand_tables.tex")
print(all_diff_stand_tables)

  ggplot(data=rbind(results_baseline_both_sem, results_cf_both_sem), aes(x=y_growth, color=scenario)) + geom_density() + theme_bw() + theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left")
  ggsave("./output/y_growth_base_cf.pdf")
  
  ggplot(data=rbind(results_baseline_both_sem, results_cf_both_sem), aes(x=y_growth, color=scenario)) + geom_density() + theme_bw() + facet_grid(race ~ sex)+ theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left")
  ggsave("./output/y_growth_base_cf_by_race_sex.pdf")

  ggplot(data=rbind(results_baseline_both_sem, results_cf_both_sem), aes(x=y_model, color=scenario)) + geom_density() + theme_bw()+ theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left") 
ggsave("./output/y_model_base_cf.pdf")
  
  ggplot(data=rbind(results_baseline_both_sem, results_cf_both_sem), aes(x=y_model, color=scenario)) + geom_density() + theme_bw() + facet_grid(race ~ sex)+ theme(legend.position="bottom", legend.key = element_blank(),legend.box.just = "left")
ggsave("./output/y_model_base_cf_by_race_sex.pdf")

## plot results by study type percentile
  mu_s_lim<-quantile(results_diff_both_sem$mu_s, c(0.00,0.95), na.rm=T)  
  y_diff_lim<-quantile(results_diff_both_sem$y_diff, c(0.01,0.99), na.rm=T)
  mu_s_ordered_df<-results_diff_both_sem[order(results_diff_both_sem$mu_s),]
  mu_s_ordered_df$mu_s_pctile<-(1:length(results_diff_both_sem$mu_s))/length(results_diff_both_sem$mu_s)
  
  tikz("./output/plot_y_diff_mu_s.tex", standAlone = F)
    ggplot(data=mu_s_ordered_df, aes(x=mu_s_pctile, y=y_diff, color=NULL)) + geom_smooth(se=F) + geom_point(alpha=0.15) + coord_cartesian(xlim=0:1, ylim=y_diff_lim) + labs(x="percentile of estimated study type", y="change in GPA", title="")+ my_theme
    #ggsave("./output/y_diff_mu_s.pdf")
  dev.off()
  
  tikz("./output/plot_y_diff_mu_s_race.tex", standAlone = F)
    ggplot(data=mu_s_ordered_df, aes(x=mu_s_pctile, y=y_diff, color=race, linetype=race)) + geom_smooth(se=F) + geom_point(alpha=0.15) + coord_cartesian(xlim=0:1, ylim=y_diff_lim) + labs(x="percentile of estimated study type", y="change in GPA", title="By race")+ my_theme
    #ggsave("./output/y_diff_mu_s_race.pdf")
  dev.off()
  
  tikz("./output/plot_y_diff_mu_s_sex.tex", standAlone = F)
    ggplot(data=mu_s_ordered_df, aes(x=mu_s_pctile, y=y_diff, color=sex, linetype=sex)) + geom_smooth(se=F) + geom_point(alpha=0.15) + coord_cartesian(xlim=0:1, ylim=y_diff_lim) + labs(x="percentile of estimated study type", y="change in GPA", title="By sex")+ my_theme
  #ggsave("./output/y_diff_mu_s_sex.pdf")
  dev.off()

  tikz("./output/plot_y_diff_mu_s_hsgpa.tex", standAlone = F)
    ggplot(data=mu_s_ordered_df, aes(x=mu_s_pctile, y=y_diff, color=hsgpa_level, linetype=hsgpa_level)) + geom_smooth(se=F) + geom_point(alpha=0.15) + coord_cartesian(xlim=0:1, ylim=y_diff_lim) + labs(x="percentile of estimated study type", y="change in GPA", title="By HS GPA")+ my_theme + scale_color_discrete(name="HS GPA level") + scale_linetype_discrete(name="HS GPA level")
    #ggsave("./output/y_diff_mu_s_hsgpa.pdf")
  dev.off()

mu_s_ordered_df<-mu_s_ordered_df[order(mu_s_ordered_df$mu_y),]
mu_s_ordered_df$mu_y_pctile<-(1:length(results_diff_both_sem$mu_s))/length(results_diff_both_sem$mu_s)
mu_s_ordered_df<-mu_s_ordered_df[order(mu_s_ordered_df$mu_s),]

ggplot(mu_s_ordered_df, aes(x=mu_s_pctile, y=mu_y_pctile)) + stat_density2d(bins=70,aes(color = ..level..))  + theme_bw()+ labs(x="percentile of estimated study type", y="percentile of estimated human capital type")+ scale_color_continuous(name  ="density")
ggsave("./output/type_density.pdf")

  tikz('./output/plot_type_scatter.tex', standAlone=F)
    ggplot(data=subset(results_diff_both_sem, sem==1), aes(x=mu_s, y=mu_y)) + geom_point(alpha=0.15) + coord_cartesian(xlim=mu_s_lim) + labs(x="estimated study type, $\\hat{\\mu}_s$", y="$\\hat{\\mu}_y$") + geom_smooth(se=F, method="lm") + my_theme
    #ggsave("./output/type_scatter.pdf")
  dev.off()

### anchor in meaningful metrics
  source("map_gpa_to_wage_grad.R")
